import os
import sys
sys.path.append(os.getcwd())
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.figure_factory as ff
from sklearn.linear_model import LinearRegression, Lasso, LassoCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
import gc
import warnings
url = 'https://www.dropbox.com/s/iasfmrfdzafjkyq/student_data.csv?dl=1'
gc.enable()
warnings.filterwarnings('ignore')
def get_scatter(xval: pd.Series, yval: pd.Series, n: str):
fig = go.Scatter(
x=xval,
y=yval,
name=n
)
return fig
def time_series(df: pd.DataFrame, ttl: str):
fig = go.Figure()
for col in df.columns:
fig.add_trace(
get_scatter(df.index, df[col], col)
)
fig.update_layout(
title=ttl + ' Monthly Cumulative Portfolio Value',
xaxis_title='Date',
yaxis_title='Portfolio Value ($1 on 1987-03-31)'
)
return fig
def plot_distribution(data: list, labels: list, colors: list, title: str):
fig = ff.create_distplot(
data,
labels,
colors=colors,
bin_size=[0.05] * len(data),
show_curve=True,
show_rug=False
)
fig.update(
layout_title_text='{} Distribution'.format(title)
)
return fig
# read data from Dropbox and remove extra identifiers
msf = pd.read_csv(url).drop(labels=['PERMNO', 'gvkey', 'COMNAM', 'TICKER', 'SICCD'], axis=1)
msf = msf.loc[:, msf.columns[1:]].copy()
gc.collect()
# get datetime objects and Market Value of each firm
msf['date'] = pd.to_datetime(msf['date'], format='%Y%m%d')
msf['nextmonth'] = pd.to_datetime(msf['nextmonth'], format='%Y%m%d')
msf['mktval'] = msf['PRC'] * msf['SHROUT']
# store predictor variable names
predictor_names = msf.columns[7:-1].tolist()
# store prediction name
prediction_name = msf.columns[-1]
msf.head()
| date | CUSIP | PRC | SHROUT | RET | nextmonth | next_Ret | atq | dvpq | seqq | ... | doq_MA4 | nopiq_MA4 | ibq_MA4 | txtq_MA4 | niq_MA4 | cheq_MA4 | saleq_MA4 | dvy_MA4 | piq_MA4 | mktval | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1987-03-31 | 00036110 | 33.50 | 9099.0 | 0.107438 | 1987-04-30 | -0.111940 | 228.106 | 0.000 | 130.427 | ... | 0.0 | 0.44225 | 3.56150 | 2.85750 | 3.56150 | 3.78525 | 70.67700 | 2.58625 | 6.41900 | 304816.5 |
| 1 | 1987-03-31 | 10304310 | 38.25 | 9158.0 | 0.145522 | 1987-04-30 | -0.049020 | 163.403 | 0.000 | 112.860 | ... | 0.0 | 0.66500 | 5.48775 | 5.37000 | 5.48775 | 26.73400 | 48.04150 | 2.54975 | 11.11825 | 350293.5 |
| 2 | 1987-03-31 | 89051610 | 61.25 | 4418.0 | -0.020349 | 1987-04-30 | -0.089796 | 79.886 | 0.000 | 64.583 | ... | 0.0 | 0.37900 | 3.20050 | 3.22475 | 3.20050 | 25.79550 | 27.85075 | 1.06600 | 6.42525 | 270602.5 |
| 3 | 1987-03-31 | 89109210 | 31.00 | 6633.0 | -0.038610 | 1987-04-30 | -0.032258 | 261.411 | 0.296 | 74.113 | ... | 0.0 | -0.10925 | 4.20075 | 2.67900 | 4.20075 | 7.61300 | 112.99875 | 2.64300 | 6.87975 | 205623.0 |
| 4 | 1987-03-31 | 89190610 | 26.00 | 15916.0 | -0.223881 | 1987-04-30 | -0.028846 | 27.390 | 0.000 | 24.649 | ... | 0.0 | 0.06650 | 1.30950 | 1.29375 | 1.30950 | 4.86050 | 9.08825 | 0.00000 | 2.60325 | 413816.0 |
5 rows × 35 columns
def scale_data(df: pd.DataFrame):
scaler = StandardScaler()
scaled = scaler.fit_transform(df)
return scaled
def get_regression(df: pd.DataFrame, predictors: list, prediction: str):
X = df.loc[:, predictors]
y = df.loc[:, prediction]
reg = LinearRegression()
# x_scaled = scale_data(X)
reg.fit(X, y)
y_pred = reg.predict(X)
return y_pred
The mispricing signal is calculated as: $$ M_{j,t} = {{FairValuePrediction_{j,t} - MarketValue_{j,t}} \over MarketValue_{j,t}} $$ Where:
# get monthly fair value for each firm through simple OLS
linear_fair_value = pd.DataFrame(msf.groupby('date').apply(
lambda x: pd.Series(
get_regression(x, predictor_names, prediction_name),
name='linear_fvp'
))).reset_index()
# add results to main data frame
msf['linear_fvp'] = linear_fair_value['linear_fvp']
# calculate mispricing signal
msf['linear_sig'] = (msf['linear_fvp'] - msf['mktval']) / msf['mktval']
# run LASSO Regression to extract relevant features
def feature_selection(df: pd.DataFrame, predictors: list, prediction):
X = df.loc[:, predictors]
y = df.loc[:, prediction]
# x_scaled = scale_data(X)
lasso = Lasso(max_iter=10000, normalize=True)
lasso_cv = LassoCV(alphas=None, cv=10, max_iter=10000, normalize=True)
# fit Cross Validation to get optimal alpha
lasso_cv.fit(X, y)
lasso.alpha = lasso_cv.alpha_
# fit Lasso with optimal penalty
lasso.fit(X, y)
# select optimal features -> coefficient > 0
coefficients = lasso.coef_
selected_features = list(np.array(predictors)[coefficients > 0])
return selected_features
# run OLS post-LASSO feature selection
def ols_post_lasso(df: pd.DataFrame, predictors: list, prediction: str):
# use LASSO regression to select relevant features
relevant_feats = feature_selection(df, predictors, prediction)
# get predicted values from OLS with selected features
y_pred = get_regression(df, relevant_feats, prediction)
return y_pred
The mispricing signal is calculated as before, except we select the relevant explanatory features through a Least Absolute Selection and Shrinkage Operator.
# calculate predicted fair value with post-LASSO regression
lasso_fair_value = pd.DataFrame(msf.groupby('date').apply(
lambda x: pd.Series(
ols_post_lasso(x, predictor_names, prediction_name),
name='post_lasso_fvp'
))).reset_index()
# calculate OLS post-LASSO mispricing signal
msf['lasso_fvp'] = lasso_fair_value['post_lasso_fvp']
msf['lasso_sig'] = (msf['lasso_fvp'] - msf['mktval']) / msf['mktval']
def get_random_forest(df: pd.DataFrame, predictors: list, prediction: str):
X = df.loc[:, predictors]
y = df.loc[:, prediction]
x_scaled = scale_data(X) # NOTE: Can possibly remove
rf_reg = RandomForestRegressor(
n_estimators=1000,
random_state=42,
min_samples_leaf=20,
max_depth=100,
n_jobs=-1
)
rf_reg.fit(x_scaled, y)
y_pred = rf_reg.predict(x_scaled)
return y_pred
# calculate predicted fair value with Random Forest regression
rf_fair_value = pd.DataFrame(msf.groupby('date').apply(
lambda x: pd.Series(
get_random_forest(x, predictor_names, prediction_name),
name='rf_fvp'
))).reset_index()
# calculate mispricing signal
msf['rf_fvp'] = rf_fair_value['rf_fvp']
msf['rf_sig'] = (msf['rf_fvp'] - msf['mktval']) / msf['mktval']
# rank stocks in quintiles based on the signal quantiles
msf['linear_quintiles'] = msf.groupby('date', group_keys=False)['linear_sig'].apply(
lambda x: pd.qcut(
x.rank(method='first'),
5,
labels=['Q{}'.format(i) for i in range(1, 6)]
)
)
msf['lasso_quintiles'] = msf.groupby('date', group_keys=False)['lasso_sig'].apply(
lambda x: pd.qcut(
x.rank(method='first'),
5,
labels=['Q{}'.format(i) for i in range(1, 6)]
)
)
msf['rf_quintiles'] = msf.groupby('date', group_keys=False)['rf_sig'].apply(
lambda x: pd.qcut(
x.rank(method='first'),
5,
labels=['Q{}'.format(i) for i in range(1, 6)]
)
)
# add portfolio value
msf['asset_price'] = msf['PRC'] * msf['next_Ret']
# store required variables for portfolio construction
portfolio_labs = ['nextmonth', 'CUSIP', 'next_Ret', 'asset_price']
msf.head()
| date | CUSIP | PRC | SHROUT | RET | nextmonth | next_Ret | atq | dvpq | seqq | ... | linear_fvp | linear_sig | lasso_fvp | lasso_sig | rf_fvp | rf_sig | linear_quintiles | lasso_quintiles | rf_quintiles | asset_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1987-03-31 | 00036110 | 33.50 | 9099.0 | 0.107438 | 1987-04-30 | -0.111940 | 228.106 | 0.000 | 130.427 | ... | 456880.662951 | 0.498871 | 447478.401169 | 0.468026 | 291902.902475 | -0.042365 | Q3 | Q2 | Q2 | -3.749990 |
| 1 | 1987-03-31 | 10304310 | 38.25 | 9158.0 | 0.145522 | 1987-04-30 | -0.049020 | 163.403 | 0.000 | 112.860 | ... | 408503.015260 | 0.166174 | 472120.883363 | 0.347787 | 448585.047065 | 0.280598 | Q2 | Q2 | Q3 | -1.875015 |
| 2 | 1987-03-31 | 89051610 | 61.25 | 4418.0 | -0.020349 | 1987-04-30 | -0.089796 | 79.886 | 0.000 | 64.583 | ... | 275588.243491 | 0.018425 | 357723.212734 | 0.321951 | 276894.497616 | 0.023252 | Q2 | Q2 | Q2 | -5.500005 |
| 3 | 1987-03-31 | 89109210 | 31.00 | 6633.0 | -0.038610 | 1987-04-30 | -0.032258 | 261.411 | 0.296 | 74.113 | ... | 330098.489303 | 0.605358 | 374531.419583 | 0.821447 | 284665.347578 | 0.384404 | Q3 | Q3 | Q4 | -0.999998 |
| 4 | 1987-03-31 | 89190610 | 26.00 | 15916.0 | -0.223881 | 1987-04-30 | -0.028846 | 27.390 | 0.000 | 24.649 | ... | 146818.190213 | -0.645209 | 256074.151786 | -0.381188 | 161779.517648 | -0.609054 | Q1 | Q1 | Q1 | -0.749996 |
5 rows × 45 columns
# slice worst and best performers to build portfolios
linear_q1 = msf.loc[msf.linear_quintiles == 'Q1'].loc[:, portfolio_labs + ['linear_sig']].copy()
linear_q5 = msf.loc[msf.linear_quintiles == 'Q5'].loc[:, portfolio_labs + ['linear_sig']].copy()
# create signal-weighted retruns
linear_q1['sig_ret'] = linear_q1['linear_sig'] * linear_q1['next_Ret']
linear_q5['sig_ret'] = linear_q5['linear_sig'] * linear_q5['next_Ret']
linear_q1['sig_price'] = linear_q1['linear_sig'] * linear_q1['asset_price']
linear_q5['sig_price'] = linear_q5['linear_sig'] * linear_q5['asset_price']
# equal-weighted portfolio monthly returns
lin_q1_rets = linear_q1.groupby('nextmonth', as_index=False).agg(
{'next_Ret': 'mean', 'asset_price': 'mean'}
).rename(columns={'next_Ret': 'Q1_returns', 'asset_price': 'Q1_values'})
lin_q5_rets = linear_q5.groupby('nextmonth', as_index=False).agg(
{'next_Ret': 'mean', 'asset_price': 'mean'}
).rename(columns={'next_Ret': 'Q5_returns', 'asset_price': 'Q5_values'})
linear_eqw = lin_q1_rets.merge(lin_q5_rets, on='nextmonth', how='inner').set_index('nextmonth')
linear_eqw['Strategy'] = linear_eqw['Q5_returns'] - linear_eqw['Q1_returns']
linear_eqw['Strategy_Values'] = linear_eqw['Q5_values'] - linear_eqw['Q1_values']
# cumulative equal-weighted returns
linear_eqw['Q1_cumulative'] = (linear_eqw['Q1_returns'] + 1).cumprod() - 1
linear_eqw['Q5_cumulative'] = (linear_eqw['Q5_returns'] + 1).cumprod() - 1
linear_eqw['Strategy_Cumulative'] = (linear_eqw['Strategy'] + 1).cumprod() - 1
linear_eqw.head()
| Q1_returns | Q1_values | Q5_returns | Q5_values | Strategy | Strategy_Values | Q1_cumulative | Q5_cumulative | Strategy_Cumulative | |
|---|---|---|---|---|---|---|---|---|---|
| nextmonth | |||||||||
| 1987-04-30 | -0.020665 | -0.623214 | -0.023487 | -0.195654 | -0.002822 | 0.427560 | -0.020665 | -0.023487 | -0.002822 |
| 1987-05-29 | 0.003433 | 0.102336 | -0.003725 | 0.024674 | -0.007159 | -0.077662 | -0.017302 | -0.027125 | -0.009961 |
| 1987-06-30 | 0.014769 | 0.724104 | 0.029162 | 0.326602 | 0.014393 | -0.397502 | -0.002789 | 0.001246 | 0.004288 |
| 1987-07-31 | 0.047469 | 1.677297 | 0.030508 | 0.383517 | -0.016961 | -1.293780 | 0.044548 | 0.031792 | -0.012745 |
| 1987-08-31 | 0.022547 | 1.081732 | 0.020449 | 0.186669 | -0.002098 | -0.895063 | 0.068099 | 0.052892 | -0.014816 |
# signal-weighted monthly returns
lin_q1_sig_rets = linear_q1.groupby('nextmonth', as_index=False).agg(
{'sig_ret': 'mean', 'asset_price': 'mean'}
).rename(columns={'sig_ret': 'Q1_returns', 'asset_price': 'Q1_values'})
lin_q5_sig_rets = linear_q5.groupby('nextmonth', as_index=False).agg(
{'sig_ret': 'mean', 'asset_price': 'mean'}
).rename(columns={'sig_ret': 'Q5_returns', 'asset_price': 'Q5_values'})
linear_sgw = lin_q1_sig_rets.merge(lin_q5_sig_rets, on='nextmonth', how='inner').set_index('nextmonth')
linear_sgw['Strategy'] = linear_sgw['Q5_returns'] - linear_sgw['Q1_returns']
linear_sgw['Strategy_Values'] = linear_sgw['Q5_values'] - linear_sgw['Q1_values']
# cumulative signal-weighted returns
linear_sgw['Q1_cumulative'] = (linear_sgw['Q1_returns'] + 1).cumprod() - 1
linear_sgw['Q5_cumulative'] = (linear_sgw['Q5_returns'] + 1).cumprod() - 1
linear_sgw['Strategy_Cumulative'] = (linear_sgw['Strategy'] + 1).cumprod() - 1
lin_eqw_fig = time_series(df=linear_eqw.loc[:, linear_eqw.columns[6:]], ttl='OLS Equal-Weighted')
lin_eqw_fig.show(renderer='notebook')
lin_sgw_fig = time_series(df=linear_sgw.loc[:, linear_sgw.columns[6:]], ttl='OLS Signal-Weighted')
lin_sgw_fig.show(renderer='notebook')
lasso_q1 = msf.loc[msf.lasso_quintiles == 'Q1'].loc[:, portfolio_labs + ['lasso_sig']].copy()
lasso_q5 = msf.loc[msf.lasso_quintiles == 'Q5'].loc[:, portfolio_labs + ['lasso_sig']].copy()
lasso_q1['sig_ret'] = lasso_q1['next_Ret'] * lasso_q1['lasso_sig']
lasso_q5['sig_ret'] = lasso_q5['next_Ret'] * lasso_q5['lasso_sig']
lasso_q1['sig_price'] = lasso_q1['lasso_sig'] * lasso_q1['asset_price']
lasso_q5['sig_price'] = lasso_q5['lasso_sig'] * lasso_q5['asset_price']
# equal-weighted portfolio returns
lasso_q1_rets = lasso_q1.groupby('nextmonth', as_index=False).agg(
{'next_Ret': 'mean', 'asset_price': 'mean'}
).rename(columns={'next_Ret': 'Q1_returns', 'asset_price': 'Q1_values'})
lasso_q5_rets = lasso_q5.groupby('nextmonth', as_index=False).agg(
{'next_Ret': 'mean', 'asset_price': 'mean'}
).rename(columns={'next_Ret': 'Q5_returns', 'asset_price': 'Q5_values'})
lasso_rets = lasso_q1_rets.merge(lasso_q5_rets, on='nextmonth', how='inner').set_index('nextmonth')
lasso_rets['Strategy'] = lasso_rets['Q5_returns'] - lasso_rets['Q1_returns']
lasso_rets['Strategy_Values'] = lasso_rets['Q5_values'] - lasso_rets['Q1_values']
# signal-weighted portfolio returns
lasso_q1_sig_rets = lasso_q1.groupby('nextmonth', as_index=False).agg(
{'sig_ret': 'mean', 'asset_price': 'mean'}
).rename(columns={'sig_ret': 'Q1_returns', 'asset_price': 'Q1_values'})
lasso_q5_sig_rets = lasso_q5.groupby('nextmonth', as_index=False).agg(
{'sig_ret': 'mean', 'asset_price': 'mean'}
).rename(columns={'sig_ret': 'Q5_returns', 'asset_price': 'Q5_values'})
lasso_sig_rets = lasso_q1_sig_rets.merge(lasso_q5_sig_rets, on='nextmonth', how='inner').set_index('nextmonth')
lasso_sig_rets['Strategy'] = lasso_sig_rets['Q5_returns'] - lasso_sig_rets['Q1_returns']
lasso_sig_rets['Strategy_Values'] = lasso_sig_rets['Q5_values'] - lasso_sig_rets['Q1_values']
# cumulative returns for equal-weighted portfolio
lasso_rets['Q1_cumulative'] = (lasso_rets['Q1_returns'] + 1).cumprod() - 1
lasso_rets['Q5_cumulative'] = (lasso_rets['Q5_returns'] + 1).cumprod() - 1
lasso_rets['Strategy_Cumulative'] = (lasso_rets['Strategy'] + 1).cumprod() - 1
# cumulative returns for signal-weighted portfolio
lasso_sig_rets['Q1_cumulative'] = (lasso_sig_rets['Q1_returns'] + 1).cumprod() - 1
lasso_sig_rets['Q5_cumulative'] = (lasso_sig_rets['Q5_returns'] + 1).cumprod() - 1
lasso_sig_rets['Strategy_Cumulative'] = (lasso_sig_rets['Strategy'] + 1).cumprod() - 1
lasso_fig = time_series(df=lasso_rets.loc[:, lasso_rets.columns[6:]], ttl='OLS Post-LASSO Equal-Weighted')
lasso_fig.show(renderer='notebook')
lasso_sig_fig = time_series(df=lasso_sig_rets.loc[:, lasso_sig_rets.columns[6:]], ttl='OLS Post-LASSO Signal-Weighted')
lasso_sig_fig.show(renderer='notebook')
rf_q1 = msf.loc[msf.rf_quintiles == 'Q1'].loc[:, portfolio_labs + ['rf_sig']].copy()
rf_q5 = msf.loc[msf.rf_quintiles == 'Q5'].loc[:, portfolio_labs + ['rf_sig']].copy()
rf_q1['sig_ret'] = rf_q1['next_Ret'] * rf_q1['rf_sig']
rf_q5['sig_ret'] = rf_q5['next_Ret'] * rf_q5['rf_sig']
rf_q1['sig_price'] = rf_q1['rf_sig'] * rf_q1['asset_price']
rf_q5['sig_price'] = rf_q5['rf_sig'] * rf_q5['asset_price']
# equal-weighted portfolio returns
rf_q1_rets = rf_q1.groupby('nextmonth', as_index=False).agg(
{'next_Ret': 'mean', 'asset_price': 'mean'}
).rename(columns={'next_Ret': 'Q1_returns', 'asset_price': 'Q1_values'})
rf_q5_rets = rf_q5.groupby('nextmonth', as_index=False).agg(
{'next_Ret': 'mean', 'asset_price': 'mean'}
).rename(columns={'next_Ret': 'Q5_returns', 'asset_price': 'Q5_values'})
rf_rets = rf_q1_rets.merge(rf_q5_rets, on='nextmonth', how='inner').set_index('nextmonth')
rf_rets['Strategy'] = rf_rets['Q5_returns'] - rf_rets['Q1_returns']
rf_rets['Strategy_Values'] = rf_rets['Q5_values'] - rf_rets['Q1_values']
# signal-weighted portfolio returns
rf_q1_sig_rets = rf_q1.groupby('nextmonth', as_index=False).agg(
{'sig_ret': 'mean', 'asset_price': 'mean'}
).rename(columns={'sig_ret': 'Q1_returns', 'asset_price': 'Q1_values'})
rf_q5_sig_rets = rf_q5.groupby('nextmonth', as_index=False).agg(
{'sig_ret': 'mean', 'asset_price': 'mean'}
).rename(columns={'sig_ret': 'Q5_returns', 'asset_price': 'Q5_values'})
rf_sig_rets = rf_q1_sig_rets.merge(rf_q5_sig_rets, on='nextmonth', how='inner').set_index('nextmonth')
rf_sig_rets['Strategy'] = rf_sig_rets['Q5_returns'] - rf_sig_rets['Q1_returns']
rf_sig_rets['Strategy_Values'] = rf_sig_rets['Q5_values'] - rf_sig_rets['Q1_values']
# cumulative returns for equal-weighted portfolio
rf_rets['Q1_cumulative'] = (rf_rets['Q1_returns'] + 1).cumprod() - 1
rf_rets['Q5_cumulative'] = (rf_rets['Q5_returns'] + 1).cumprod() - 1
rf_rets['Strategy_Cumulative'] = (rf_rets['Strategy'] + 1).cumprod() - 1
# cumulative returns for signal-weighted portfolio
rf_sig_rets['Q1_cumulative'] = (rf_sig_rets['Q1_returns'] + 1).cumprod() - 1
rf_sig_rets['Q5_cumulative'] = (rf_sig_rets['Q5_returns'] + 1).cumprod() - 1
rf_sig_rets['Strategy_Cumulative'] = (rf_sig_rets['Strategy'] + 1).cumprod() - 1
rf_rets_fig = time_series(df=rf_rets.loc[:, rf_rets.columns[6:]], ttl='Random Forest Equal-Weighted')
rf_rets_fig.show(renderer='notebook')
rf_sig_fig = time_series(df=rf_sig_rets.loc[:, rf_sig_rets.columns[6:]], ttl='Random Forest Signal-Weighted')
rf_sig_fig.show(renderer='notebook')
# set inputs to compare strategy return distribution
eqw_rets = [linear_eqw['Strategy'].values, lasso_rets['Strategy'].values, rf_rets['Strategy'].values]
sgw_rets = [linear_sgw['Strategy'].values, lasso_sig_rets['Strategy'].values, rf_sig_rets['Strategy'].values]
ret_labels = ['{} Strategy Returns'.format(model) for model in ['OLS', 'OLS Post-LASSO', 'Random Forest']]
colors = ['#A569BD', '#F1C40F', '#273746']
# equal-weighted returns
df_rets = pd.concat(
[
linear_eqw.loc[:, 'Strategy'],
lasso_rets.loc[:, 'Strategy'],
rf_rets.loc[:, 'Strategy']
],
axis=1
)
df_rets.columns = ['{}_strategy'.format(method) for method in ['linear', 'lasso', 'rf']]
# signal-weighted returns
df_sig_rets = pd.concat(
[
linear_sgw.loc[:, 'Strategy'],
lasso_sig_rets.loc[:, 'Strategy'],
rf_sig_rets.loc[:, 'Strategy']
],
axis=1
)
df_sig_rets.columns = ['{}_strategy'.format(method) for method in ['linear', 'lasso', 'rf']]
def get_stats(df: pd.DataFrame):
stats = df.copy().describe()
stats.loc['kurtosis'] = df.kurt()
stats.loc['skewness'] = df.skew()
return stats
def get_bar(xval: pd.Series, yval: pd.Series, n: str):
fig = go.Bar(
x=xval,
y=yval,
name=n
)
return fig
def plot_stats(df: pd.DataFrame, ttl: str):
fig = go.Figure(
data=[
get_bar(xval=df.index, yval=df[col], n=col) for col in df.columns
]
)
fig.update_layout(
title=ttl,
xaxis_title='Metric',
yaxis_title='Value',
barmode='group'
)
return fig
def get_sr(stats_df: pd.DataFrame, rf: float):
return (stats_df.loc['mean'] - rf)/stats_df.loc['std']
def get_MDD(df: pd.DataFrame, col: str, window : int = 12, min_periods : int = 1):
"""
As taken from https://quant.stackexchange.com/questions/18094/how-can-i-calculate-the-maximum-drawdown-mdd-in-python
"""
# Change min_periods if you want to let the first X days data have an expanding window
max_rolling_window = df[col].rolling(window, min_periods=1).max()
DD = df[col]/max_rolling_window - 1.0
return DD.rolling(window, min_periods=1).min()
eqw_distrib = plot_distribution(data=eqw_rets, labels=ret_labels, colors=colors, title='Equal-Weighted Portfolios Strategy Returns')
eqw_distrib.show(renderer='notebook')
sgw_distrib = plot_distribution(data=sgw_rets, labels=ret_labels, colors=colors, title='Signal-Weighted Portfolios Strategy Returns')
sgw_distrib.show(renderer='notebook')
# equal-weighted statistics
eqw_stats = get_stats(df_rets)
eqw_stats
| linear_strategy | lasso_strategy | rf_strategy | |
|---|---|---|---|
| count | 310.000000 | 310.000000 | 310.000000 |
| mean | 0.007734 | 0.005971 | 0.007340 |
| std | 0.031684 | 0.045262 | 0.034602 |
| min | -0.178231 | -0.331893 | -0.158035 |
| 25% | -0.009192 | -0.011552 | -0.010823 |
| 50% | 0.006142 | 0.006095 | 0.006415 |
| 75% | 0.022721 | 0.021554 | 0.024474 |
| max | 0.158573 | 0.261344 | 0.190178 |
| kurtosis | 7.584601 | 16.988515 | 5.997929 |
| skewness | 0.152737 | -0.789510 | 0.252297 |
# signal-weighted statistics
sgw_stats = get_stats(df_sig_rets)
sgw_stats
| linear_strategy | lasso_strategy | rf_strategy | |
|---|---|---|---|
| count | 310.000000 | 310.000000 | 310.000000 |
| mean | 0.081437 | 0.088887 | 0.022604 |
| std | 0.422306 | 0.487088 | 0.162808 |
| min | -1.397730 | -2.934913 | -0.542772 |
| 25% | -0.110666 | -0.142098 | -0.048055 |
| 50% | 0.084073 | 0.112983 | 0.030816 |
| 75% | 0.275032 | 0.348066 | 0.093782 |
| max | 1.689902 | 2.369476 | 0.742762 |
| kurtosis | 2.424560 | 6.540261 | 3.064944 |
| skewness | -0.097280 | -0.787546 | -0.206759 |
# store stats to be plotted
stats_labs = ['mean', 'std', '50%']
eqw_stats_fig = plot_stats(eqw_stats.loc[eqw_stats.index.isin(stats_labs)], 'Portfolio Descriptive Statistics')
eqw_stats_fig.show(renderer='notebook')
# TODO: Need to plot this:
eq_sharpe = pd.DataFrame(get_sr(eqw_stats, rf=0.0416), columns=['Sharpe_Ratio'])
eqs_fig = plot_stats(eq_sharpe, 'Equal-Weighted Sharpe Ratios')
eqs_fig.show(renderer='notebook')
sgw_sharpe = pd.DataFrame(get_sr(sgw_stats, rf=0.0416), columns=['Sharpe_Ratio'])
sgs_fig = plot_stats(sgw_sharpe, 'Signal Weighted Sharpe Ratios')
sgs_fig.show(renderer='notebook')
# get MDD for equal weighted portfolios
eqw_mdd = pd.DataFrame(index=linear_eqw.index)
eqw_mdd['linear_MDD'] = get_MDD(linear_eqw, 'Strategy_Values')
eqw_mdd['lasso_MDD'] = get_MDD(lasso_rets, 'Strategy_Values')
eqw_mdd['rf_MDD'] = get_MDD(rf_rets, 'Strategy_Values')
eqw_mdd_fig = time_series(eqw_mdd, 'Equal Weighted Max Drawdown')
eqw_mdd_fig.show(renderer='notebook')
# get signal weighted max drawdown
sgw_mdd = pd.DataFrame(index=linear_sgw.index)
sgw_mdd['linear_MDD'] = get_MDD(linear_sgw, 'Strategy_Values')
sgw_mdd['lasso_MDD'] = get_MDD(lasso_sig_rets, 'Strategy_Values')
sgw_mdd['rf_MDD'] = get_MDD(rf_sig_rets, 'Strategy_Values')
sgw_mdd_fig = time_series(sgw_mdd, 'Signal Weighted Max Drawdown')
sgw_mdd_fig.show(renderer='notebook')